*************************************************************************************************
/*																								
	Purpose: 																
	-Clean NSBA_rawdata.dta													
	-Attach 1950 ANES occ codes to nsba occupations 											
*/																								
*************************************************************************************************

clear 
set more off

cd "$Mydirectory1/1_DataSources/NSBA/"

use ./input/NSBA_rawdata.dta, clear //download from ICSPR website. Search ICPSR 8512. Name is the same
quietly do ./code/08512-0001-Supplemental_syntax.do //download from ICSPR website.

rename V* v*

*** Weight
/* No sampling weight was provided. Codebook states 
   that the sample was self-weighting, and every Black 
   American household in the continental US had the 
   same probability of being selected.
*/

gen weight_nsba =1

/*------------------------------------------------------------------------------------------
											CLEANING
------------------------------------------------------------------------------------------*/

***********************************************
* Demographics *
***********************************************

*****************
* Gender *
*****************

rename v1586 sex
tab sex, m

*****************
* Age *
*****************

ren v2012 age
gen agesq = age * age

keep if inrange(age,30,50)

******************
* Marital Status *
******************

gen married = (v908==1) if v908<.
tab married, m

gen never_married = (v908==5) if v908<.
tab never_married, m

gen widowed = (v908==4) if v908<.
tab widowed, m

gen divorced = (v908==2) if v908<.
tab divorced, m

gen separated = (v908==3) if v908<.
tab separated, m 

******************
* Race *
******************

gen race = 2 /*Sample consists of Black Americans.*/
label variable race "Respondent's Race (re-coded as white, black, missing)"

gen black = (race==2)
label variable black "Dummy =1 if Respondent is Black"

*Father race is not available.
******************
* Foreign Born *
******************

*Respondent
gen foreignborn = (v1405>=66000) if v1405<. 
replace foreignborn =1 if inlist(v1405,73,74,76,78) 
replace foreignborn =. if v1405==97 /*97 doesn't appear to exist in the codebook*/
replace foreignborn =. if inrange(v1405,100,950) /*Unclear if these 3-digit fips come from US counties or from outside the US.*/
tab foreignborn, m
label var foreignborn "Respondent is foreignborn"

keep if (foreignborn==0 | foreignborn==.)

*Father foreign--not available

*-------------------------------------------------------------------------------------------------*
*-------------------------------------------------------------------------------------------------*

*************************************************************************************
* State/Region Where Respondent Grew Up or Was Born *
*************************************************************************************

***********************
* State R was born in * 
***********************

*v1405 matches 5-digit fips codes FOR ALL 50 STATES, BUT NOT FOR TERRITORIES
replace v1405 = 78 if inlist(v1405,64,64051) /* Virgin Islands*/

*tag all 2-digit US fips
gen tag = inrange(v1405,28,78) 
tab tag, m

gen bpl = real(substr(string(v1405,"%05.0f"),1,2)) if tag~=1 /*Extract first 2 digits (i.e. state fips) of 5-digit state and county fips code */
replace bpl = v1405 if tag==1
replace bpl =. if bpl==0 /*bpl=0 means that R has a 3 digit fips from an indeterminate country.*/
replace bpl =15 if bpl==59 /* Replace "Hawaiian coast" (fips #59) as "Hawaii" (fips #15). */
tab bpl, m
label var bpl "R Birthplace, FIPS codes"

***********************
* Region R was born in * 
***********************

gen fips = bpl
gen region4_born=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
replace region4_born=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
replace region4_born=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
replace region4_born=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
replace region4_born=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15
tab region4_born, m
label var region4_born "Region R born"

drop fips tag

******************************************
* Whether R was born in the south * 
******************************************

gen bornsouth = (region4_born==3) if region4_born~=.
tab bornsouth, m

******************************************
* State and Region R grew up in * 
******************************************

*State
replace v1407=. if v1407>=66000 //Outside the US

*Correct state fips codes for US territories
replace v1407 = 78 if v1407==64 /* Virgin Islands*/

*tag all 2-digit US fips
gen tag = inrange(v1407,17,78) 
tab tag, m

gen state_childhood = real(substr(string(v1407,"%05.0f"),1,2)) if tag~=1 /*Extract first 2 digits (i.e. state fips) of 5-digit state and county fips code */
replace state_childhood = v1407 if tag==1
replace state_childhood =. if state_childhood==0 /*bpl=0 means that R has a 3 digit fips from an indeterminate country.*/
replace state_childhood =15 if state_childhood==59 /* Replace "Hawaiian coast" (fips #59) as "Hawaii" (fips #15). */
tab state_childhood, m

*Region
gen fips = state_childhood

gen region4_childhood=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
replace region4_childhood=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
replace region4_childhood=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
replace region4_childhood=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
replace region4_childhood=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15

label var region4_childhood "Region R grew up"

drop fips tag

******************************************
* Region R currently resides in * 
******************************************

*Note: AK and HI are included in the "4", or "west" category.
rename v2004 region4
tab region4, m

label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
label values region4 region4_born region4_childhood region_l
tab region4_born, m
tab region4_childhood, m
tab region4, m

******************************************
* Whether R has moved region since birth * 
******************************************

gen moved_region = (region4~=region4_born | region4~=region4_childhood)

//Correct moved_region for when region4_born =. OR region4_childhood =.
replace moved_region =. if (region4_born==. & region4_childhood==.) 
replace moved_region =0 if (region4==region4_born & region4_childhood==. & region4~=.)
replace moved_region =0 if (region4==region4_childhood & region4_born==. & region4~=.)
tab moved_region, m

*-------------------------------------------------------------------------------------------------*
*-------------------------------------------------------------------------------------------------*

*****************
* Employment *
*****************

gen employed = (v301==1) if v301<.
tab employed, m

*self employed--unavailable

*******************************
* Education-Respondent *
*******************************

ren v1434 higrade
tab higrade, m

gen eduR= 0 if higrade==0 
replace eduR=1 if higrade>=1 & higrade<8  /* some grade school */ 
replace eduR=2 if higrade==8  /* completed 8th grade */ 
replace eduR=3 if higrade>8 & higrade<12  /* some HS */ 
replace eduR=4 if higrade==12 /* 4 years of HS */ 
replace eduR=5 if higrade>12 & higrade<16  /* 1-3 years of college */ 
replace eduR=6 if higrade>15 & higrade<.  /* 4 or more years of college (like BA) */ 
tab eduR, m
			
rename higrade yrsschool 
label var yrsschool "Years of school" 
tab yrsschool, m nol 

gen yrsschool_bin=. 
replace yrsschool_bin = 0 if yrsschool==0 
replace yrsschool_bin = 6 if yrsschool>0 & yrsschool<8 
replace yrsschool_bin = 8 if yrsschool==8 
replace yrsschool_bin = 10 if yrsschool>8 & yrsschool<12 
replace yrsschool_bin = 12 if yrsschool==12 
replace yrsschool_bin = 14 if yrsschool>12 & yrsschool<16 
replace yrsschool_bin = 16 if yrsschool>=16 & yrsschool<20 
tab yrsschool_bin, m
label var yrsschool_bin "Years of school, binned" 
	
gen hs_ed = eduR>=4 if eduR<. 
tab hs_ed, m
label var hs_ed "HS educated" 

gen coll_ed = eduR>=6 if eduR<.
tab coll_ed, m 
label var coll_ed "Coll educated" 

*********************
* Education-Parents *
*********************

rename v1470 dad_ed_raw
rename v1475 mom_ed_raw

foreach name in mom dad { 
replace `name'_ed_raw =. if `name'_ed_raw==96

gen edu_`name'=0 if `name'_ed_raw==0 //none
replace edu_`name'=1 if `name'_ed_raw>0 & `name'_ed_raw<8 //some grade school
replace edu_`name'=2 if `name'_ed_raw==8 //completed 8th grade
replace edu_`name'=3 if `name'_ed_raw>8 & `name'_ed_raw<12 //some HS 
replace edu_`name'=4 if `name'_ed_raw==12 //4 years of HS
replace edu_`name'=5 if `name'_ed_raw>12 & `name'_ed_raw<16 //some college
replace edu_`name'=6 if `name'_ed_raw==16 | `name'_ed_raw==17  //college
label var edu_`name' "Educational categories for `name'" 
tab edu_`name', m
} 

* binned
foreach name in mom dad { 
gen edu_`name'_bin=0 if edu_`name'==0 
replace edu_`name'_bin=6 if edu_`name'==1 
replace edu_`name'_bin=8 if edu_`name'==2 
replace edu_`name'_bin=10 if edu_`name'==3 
replace edu_`name'_bin=12 if edu_`name'==4 
replace edu_`name'_bin=14 if edu_`name'==5 
replace edu_`name'_bin=16 if edu_`name'==6 
tab edu_`name'_bin, m
label var edu_`name'_bin "`name' Years of school from bins"
	
gen `name'_hs_ed = edu_`name'>=4 if edu_`name'<.
tab `name'_hs_ed, m

gen `name'_coll_ed = edu_`name'>=6 if edu_`name'<.
tab `name'_coll_ed, m 

label var `name'_hs_ed "`name' HS educated" 
label var `name'_coll_ed "`name' College educated"
}

foreach a in dad mom {
	rename `a'_ed_raw yrsschool_`a'
	tab yrsschool_`a', m
}

************
* Siblings *
************

*# brothers & sisters
	foreach var of varlist v1569 v1570 {
	
		if "`var'"=="v1569" local s "brothers"
		if "`var'"=="v1570" local s "sisters"
		
		tab `var', m
		replace `var' =0 if `var'==80 //R is an only child. 
		tab `var', m
		ren `var' R_num_`s'
		label var R_num_`s' "# of R's `s'"
	}
	
*# of siblings
	replace v1568 =0 if v1568==80 //See previous comment.
	tab v1568, m
	ren v1568 R_num_siblings
	label var R_num_siblings "# of R's siblings"

*****************
* Own fertility *
*****************

* # (living) boys--not available
* Flag indeterminate (high) # of boys--not available
* # (living) girls--not available
* Flag indeterminate (high) # of girls--not available

* # of living kids
	replace v963 =0 if v963==80 // 80 = none living
	tab v963, m
	ren v963 R_numkids_living
	label var R_numkids_living "# of R's kids who are living"
	label define V963 0 "0", modify 
	
* # of kids ever--not available
	replace v962=. if v962==80 //indeterminate # adopted, foster, or stepchildren
	tab v962, m
	ren v962 R_numkids_ever
	label var R_numkids_ever "# of kids that R has ever had (living or deceased)"	
	replace R_numkids_ever = R_numkids_living if R_numkids_ever==. & (R_numkids_living>0 & R_numkids_living<.) //When R_numkids_ever is missing and R_numkids_living is not, use R_numkids_living.
	
* # of deceased kids
	gen R_numkids_dead = R_numkids_ever - R_numkids_living if (R_numkids_ever!=. & R_numkids_living!=.) 
	tab R_numkids_dead,m 
	label var R_numkids_dead "# of R's kids who are deceased"
	
* Dummy: has R ever had kids
	gen R_kids_ever = R_numkids_ever!=0 if R_numkids_ever<.
	tab R_kids_ever,m 
	label var R_kids_ever "Dummy=1 if R has ever had kids"

*Dummy: does R have kids right now?
	gen R_kids_now = R_numkids_living!=0 if R_numkids_living<. 
	tab R_kids_now,m 

*********************************
* # of persons in R's household *
*********************************

* # of kids (of any age) living in R's hh--unavailable

* # of children 0-17 living in R's hh
	ren v1851 R_totnumkids_0to17_livinginhh 
	label var R_totnumkids_0to17_livinginhh "Total # of kids (0-17) living in R's hh"

*total # of people living in R's household 
	ren v1849 R_hhsize_plusR //R appears to be included in v1849 (total # of persons in the household unit)
	
	gen R_hhsize_minusR = R_hhsize_plusR -1
	tab R_hhsize_minusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

******************
* Unions *
******************

gen unionR = (v324==1) if v324<. 
/*Note: unionR =1 if R's job is covered by a union contract, 
        regardless of whether R belongs to the union. Only
        12% of covered respondents do not belong to the union.
        Only 3.5% of sample respondents are not in a union.*/
tab unionR, m

*******************
* Veterans *
*******************

*Unavailable

/*------------------------------------------------------------------------------------------

Crosswalking 1979-1980 NSBA occupations to 1950 ANES occupations for fathers

------------------------------------------------------------------------------------------*/

***********************
*					  *
*    Crosswalking     *
* 					  *
***********************

ren v1471 dad_occ
ren v1477 mom_occ 
//Note: Same occupation codes are used for both dad occupation and mom occupation.

foreach name in dad mom {
	
	gen occ = `name'_occ 
	label var occ "==`name'_occ (renamed to facilitate a merge)" 

	preserve
	use "../Crosswalks/NSBA_crosswalk.dta", clear
	
	if "`name'"=="dad" ren occ1950ej fatheroccej
	if "`name'"=="mom" ren occ1950ej motheroccej
	
	tempfile crossw
	save `crossw'
	restore

	merge m:1 occ using `crossw'
	tab occ if _merge==1, m
	assert (occ==99 | occ==95 | occ==.) if _merge==1
	drop if _merge==2 
	drop _merge occ
	
}

/*Following the coding in other surveys: 
--headofhh_father =1 if R lived with both parents when growing up or if R reports having lived with only a father. 
--headofhh_mother =1 if R lived with a mother but not a father. 
--headofhh_othermale =1 if R lived with a male relative and not with R's parents.
--headofhh_otherfemale =1 if R lived with a female relative and not with R's parents or a male relative. 
*/

    ds, has(varlabel "*<16 LIVE*")
    di "`r(varlist)'"
    global famcomp = "`r(varlist)'"
    
    foreach f of global famcomp {
        tab `f', m
        replace `f' =0 if `f'==5
        tab `f', m
        
        if "`f'"=="v1553" local n 1
        if "`f'"=="v1554" local n 2
        if "`f'"=="v1555" local n 3
        if "`f'"=="v1556" local n 4
        if "`f'"=="v1557" local n 5
        if "`f'"=="v1558" local n 6
        if "`f'"=="v1559" local n 7
        if "`f'"=="v1560" local n 8
        if "`f'"=="v1561" local n 9
        if "`f'"=="v1562" local n 10
        if "`f'"=="v1563" local n 11
        if "`f'"=="v1564" local n 12
        if "`f'"=="v1565" local n 13
        
        gen test1_`n' = (`f'==.)
        tab test1_`n',m         
        
    } 

    egen test1 = rowtotal(test1*)
    tab test1,m //Only a couple Rs should have "." for the head of hh variables
		
	gen headofhh_father =.
	replace headofhh_father =1 if (v1554==1 | v1557==1) /*R lived with father or stepfather*/
	replace headofhh_father =0 if headofhh_father==. & test1!=13
	tab headofhh_father,m 
	
	gen headofhh_mother =.
	replace headofhh_mother =1 if (v1554==0 & v1557==0) /*no father in the house*/ & (v1553==1 | v1556==1) /*mother or step-mother present*/
	replace headofhh_mother =0 if headofhh_mother==. & test1!=13
	tab headofhh_mother,m 
	
	gen headofhh_othermale =.
	replace headofhh_othermale =1 if (v1554==0 & v1557==0 & v1553==0 & v1556==0) /*no parents in the house*/ & (v1559==1 | v1561==1 | v1564==1) /*R lived with other male (grandfather, uncle, great-grandfather*/
	replace headofhh_othermale =0 if headofhh_othermale==. & test1!=13
	tab headofhh_othermale, m
	
	gen headofhh_otherfemale = .
	replace headofhh_otherfemale =1 if (v1554==0 & v1557==0 & v1553==0 & v1556==0) /*no parents in the house*/ & (v1559==0 & v1561==0 & v1564==0) /*no other males are present*/ & (v1558==1 | v1560==1 | v1563==1) /*R lived with other female (grandmother, aunt, great-grandmother*/
	replace headofhh_otherfemale =0 if headofhh_otherfemale==. & test1!=13
	tab headofhh_otherfemale, m

	
	//Alternate dummy for father as head of hh during R's childhood.  
	/*Note: When R reports occupation of a parent but does not report 
			who R lived with when growing up, will assume that R lived 
			with father.*/
	gen headofhh_father_imputed = headofhh_father
	replace headofhh_father_imputed =1 if fatheroccej!=. & headofhh_father==. //Note: the condition "headofhh_father==." is a faster way to pinpoint Rs with no info about who they lived with when growing up.
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"
	tab headofhh_father_imputed,m 

**************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials * 
**************************************************************************
/* Note: There is no variable to see what "class" of work a father did 
         (i.e. gov't, private, self-employed, etc). Unable to adjust 
         father occupation for self-employment.*/


*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************

	gen father_notworking =.
	replace father_notworking =1 if dad_occ==96 & fatheroccej==.
	replace father_notworking =0 if fatheroccej!=.
	tab father_notworking,m 
	
	gen mother_notworking =.
	replace mother_notworking =1 if mom_occ==95 & motheroccej==.
	replace mother_notworking =0 if motheroccej!=.
	tab mother_notworking, m

* Variable for father being either farm laborer or operator 
gen fatherfarm=0 
replace fatherfarm=. if fatheroccej==. 
replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81 
tab fatherfarm, m 

//Rename to allow crosswalk to be merged again
rename fatheroccej father_occ1950ej 


/*------------------------------------------------------------------------------------------

Crosswalking 1979-1980 NSBA occupations to 1950 ANES occupations for adult Rs

------------------------------------------------------------------------------------------*/

***********************
*					  *
*    Crosswalking     *
* 					  *
***********************

rename v307 son_occ
gen census1970 = son_occ
label var census1970 "==son_occ (renamed to facilitate a merge)" 

sort census1970

merge m:1 census1970 using "../Crosswalks/Crosswalk_1970Census_toANES.dta"
assert census1970==. if _merge==1
drop if _merge==2
drop _merge

**************************************************************************
* Fix occupations for self-employed businessmen, managers, or officials * 
**************************************************************************
/*Unable to observe self-employment in this survey, so unable to adjust 
         R occupation.  */

*Rename to match the variable names in other data files 
rename fatheroccej occRej 
rename father_occ1950ej fatheroccej 

drop census1970

/*------------------------------------------------------------------------------------------

									Family Income							

------------------------------------------------------------------------------------------*/

/*
The midpoint of each bin is assigned, with the exception of:
 (1) the last bin, whose bottom value is multiplied by 1.25 (as last bin is always "open-ended"--i.e. "25,000 or more")
 (2) the bottom bin, whose top value is multiplied by 0.75
*/ 
gen fam_inc=.  
replace fam_inc = 0.75*3000 if inrange(v1499,1,4) /*0-3k*/
replace fam_inc = 4000 if (v1499==5 | v1499==6) /*3-5k*/
replace fam_inc = 6000 if (v1499==7 | v1499==8) /*5-7k*/
replace fam_inc = 8000 if (v1499==9 | v1499==10) /*7-9k*/
replace fam_inc = 10500 if (v1499==11 | v1499==12) /*9-12k*/
replace fam_inc = 13500 if v1499==13 /*12-15k*/
replace fam_inc = 17500 if v1499==14 /*15-20k*/
replace fam_inc = 22500 if v1499==15 /*20-25k*/
replace fam_inc = 27500 if v1499==16 /*25-30k*/
replace fam_inc = 1.25*30000 if v1499==17 /*30k+*/
tab fam_inc, m 
label var fam_inc "Family income, binned (based on midpoints of each bin)" 

/*Note: The suffix "_son" is used to match the variable names in other datasets. 
        All respondents (i.e., male) are given a value for these variables.*/	
gen bottomcoded_son = fam_inc==0.75*3000 if fam_inc<. 
tab bottomcoded_son, m 

gen topcoded_son = fam_inc==1.25*30000 if fam_inc<. 
tab topcoded_son, m 

label var bottomcoded_son "Respondent family income, bottom coded" 
label var topcoded_son "Respondent family income, top coded" 

/* Turn fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 */
gen CPI1950 = 24.1 
gen CPI1978 = 65.2 //Family income is measured in 1978. 

gen fam_inc_real =. 
replace fam_inc_real = fam_inc * (CPI1950/CPI1978) 
label var fam_inc_real "Binned Family income, in 1950 dollars" 
	
gen lnfaminc=ln(fam_inc_real) 
label var lnfaminc "Logged family income, binned and real" 

/*------------------------------------------------------------------------------------------

									Birth cohorts							

------------------------------------------------------------------------------------------*/

gen year = 1980
label var year "Survey year" 

gen dob = v1403
tab dob, m
label var dob "Year of birth" 

gen decade=.
replace decade=1920 if inrange(dob,1920,1929)
replace decade=1930 if inrange(dob,1930,1939)
replace decade=1940 if inrange(dob,1940,1949)
replace decade=1950 if inrange(dob,1950,1959)
tab decade,m
label var decade "Decade of birth"

* Generate dummies for each decade
tab decade, gen(decade_)

/*------------------------------------------------------------------------------------------

									Interactions			

------------------------------------------------------------------------------------------*/ 

	global institution_list "unionR black hs_ed coll_ed"

* Demean the variables that we will use
	foreach var in $institution_list {
	sum `var'
	gen `var'_dm = `var'- `r(mean)'
	local temp: var label `var' 
	label var `var'_dm "`temp', demeaned" 
	}

/*------------------------------------------------------------------------------------------

										Save			

------------------------------------------------------------------------------------------*/ 

gen faminc_missing = fam_inc_real==.
label var faminc_missing "Family income missing"

* Unique identifier: v1
ren v1 id_nsba
label var id_nsba "INTERVIEW NUMBER (unique identifier)"

duplicates report id_nsba //no duplicates reported

* Restrict sample and save
compress
sort id_nsba
order id_nsba weight_nsba
save ./output/cleaned_nsba, replace 
